Raw Data :

In [7]:
df.head()
Out[7]:
sender_name timestamp_ms content type photos gifs audio_files year month hour
0 Mélanie Amazo'Night Bergeot 2020-03-29 07:55:58.952 :p Generic NaN NaN NaN 2020 3 07:55
1 Nicolas Cailleux 2020-03-29 07:43:47.446 t'as de quoi faire un peu quand meme :p Generic NaN NaN NaN 2020 3 07:43
2 Nicolas Cailleux 2020-03-29 07:43:41.209 exactement ça va Generic NaN NaN NaN 2020 3 07:43
3 Nicolas Cailleux 2020-03-29 07:43:38.139 enfin 59340 Generic NaN NaN NaN 2020 3 07:43
4 Nicolas Cailleux 2020-03-29 07:42:49.732 60k msg Generic NaN NaN NaN 2020 3 07:42

The power of Data :

In [9]:
print(str(nb_day_first_last)+ ' day between ' +str(first_day)+ ' and ' +str(last_day))
1263 day between 2016-10-13 and 2020-03-29
In [10]:
#Calculate the number of "active day"
test_activeday=df
test_activeday['date'] = pd.to_datetime(test_activeday['timestamp_ms']).dt.date
nb_active_days=test_activeday["date"].nunique()
#%percentage of active day 
pc_active_days=int(((nb_active_days*100)/nb_day_first_last))
print("%d (%d percent) of those were ‘active’ days (i.e. messages were sent)."%(nb_active_days,pc_active_days))
#voir comment le modifier en pourcentage
1084 (85 percent) of those were ‘active’ days (i.e. messages were sent).
In [11]:
print("%d messages. I sent %d more messages than my boyfriend"%(nb_msg_send,nb_diff_msg_send))
59389 messages. I sent 1277 more messages than my boyfriend
In [13]:
print("%d unique words were used (many of which are not real words). I used %d unique words where my boyfriend used %d."%(nb_unique_word,nb_unique_word_mel,nb_unique_word_nico))
6927 unique words were used (many of which are not real words). I used 4244 unique words where my boyfriend used 2683.
In [14]:
print("2 participants - my boyfriends and I - living in FR most of the time and in our early twenties.")
2 participants - my boyfriends and I - living in FR most of the time and in our early twenties.

Message volume during our relationship

In [18]:
import datetime
# Create figure and plot space
fig, ax = plt.subplots(figsize=(18, 10))

# Add x-axis and y-axis
ax.plot(df_msg_day.index.values,
        df_msg_day['nb_msg_per_day'],
        color='green')

# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="nb_msg_send",
       title="Daily Total messages send to each other\n")


#rolling_mean4 = df_msg_day['nb_msg_per_day'].rolling(window=150).mean() #SMA
#exp2 = df_msg_day['nb_msg_per_day'].ewm(span=50, adjust=False).mean() #EMA

exp3 = df_msg_day['nb_msg_per_day'].ewm(span=150, adjust=False).mean() #EMA
#plt.plot(df_msg_day.index.values, exp2, label='Msg 50 Day EMA',color='red')

plt.plot(df_msg_day.index.values, exp3, label='Msg 150 Day EMA',color='blue')
date_str = '02-08-2017'
date_object = datetime.datetime.strptime(date_str, '%m-%d-%Y').date()
plt.axvline(date_object, 0, 1, label='Begining of our relation', color= 'pink')
plt.legend()

plt.show()

Wednesday i'm in love

In [23]:
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])

avg_msg_day = df_msg_dayofweek['average_msg_day']
Day=df_msg_dayofweek['day']
y_pos = np.arange(len(Day))
 
# Create horizontal bars
plt.barh(y_pos, avg_msg_day,color=['black','navy','teal','darkgreen','green','yellowgreen','yellow'])
 
# Create names on the y-axis
plt.yticks(y_pos, Day)
 
ax.set_xlabel('Average msg send per day')
ax.set_title('Average number of messages per day of the week')

# Show graphic
plt.show()
 

Time after time

In [25]:
import numpy as numpy
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, DateFormatter
import datetime as dt


x = pd.to_datetime(df_msg_hour.index.values)
y =  df_msg_hour['average msg per minutes']
x = date2num(x) # Convert datetime objects to the correct format for matplotlib.


fig, ax = plt.subplots(figsize=(18,10))

ax.plot_date(x, y,'b-',color='green') # Use plot_date rather than plot

# Set the xaxis major formatter as a DateFormatter object
# The string argument shows what format you want (HH:MM:SS)
ax.xaxis.set_major_formatter(DateFormatter('%H:%M'))

# Set title and labels for axes
ax.set(xlabel="Hour",
       ylabel="average msg per minutes",
       title="Hour of messages send to each other\n")

exp3 = y.ewm(span=60, adjust=False).mean() #EMA
plt.plot(x, exp3, label='Msg hour EMA',color='blue')

# This simply makes them look pretty by setting them diagonal.
fig.autofmt_xdate()
plt.legend()
plt.show()

More than word

In [34]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "xy"},{"type": "xy"}],
           ],
    subplot_titles=( "Mel","Nico")
)

bar_nico=go.Bar(x=top_10_exp_nico_final['count'],
        y=top_10_exp_nico_final['vals'],
        text=top_10_exp_nico_final['vals'],
        textposition='auto',
        orientation='h',
        hoverinfo='x',
        marker_color=px.colors.qualitative.Safe[0],
        )
bar_mel =go.Bar(x=top_10_exp_mel_final['count'],
        y=top_10_exp_mel_final['vals'],
        text=top_10_exp_mel_final['vals'],
        textposition='auto',
        orientation='h',
        hoverinfo='x',
        marker_color=px.colors.sequential.Magenta[1],
        )

fig.add_trace(bar_mel, row=1, col=1)
fig.update_xaxes(title_text=" ",autorange='reversed', row=1, col=1)
fig.update_yaxes(visible=False, row=1, col=1)

fig.add_trace(bar_nico,row=1, col=2)
fig.update_xaxes(title_text=" ", range=[0,180], row=1, col=2)

fig.update_yaxes(visible=False, row=1, col=2)

#fonctionne aussi mais que pour la première figure
#fig['layout']['xaxis']['autorange'] = "reversed"

fig.update_layout(height=600,width=600,autosize=False, showlegend=False,title_text="Word-phrase most used",title_x=0.5)
#autosize=False,
# width=600,
# height=700

fig.show()
In [35]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "xy"},{"type": "xy"}],
           ],
    subplot_titles=("Nico","Mel")
)

bar_nico=go.Bar(x=wordcount_Nico_10['count'],
        y=wordcount_Nico_10['vals'],
        text=wordcount_Nico_10['vals'],
        textposition='auto',
        orientation='h',
        hoverinfo='x',
        marker_color=px.colors.qualitative.Safe[0],
        )
bar_mel =go.Bar(x=wordcount_Mel_10['count'],
        y=wordcount_Mel_10['vals'],
        text=wordcount_Mel_10['vals'],
        textposition='auto',
        orientation='h',
        hoverinfo='x',
        marker_color=px.colors.sequential.Magenta[1],
        )

fig.add_trace(bar_nico,row=1, col=1)
fig.update_xaxes(title_text=" ",autorange='reversed', showgrid=False, row=1, col=1)
fig.update_yaxes(visible=False,autorange='reversed',showgrid=False,row=1, col=1)

fig.add_trace(bar_mel, row=1, col=2)
fig.update_xaxes(title_text=" ", showgrid=False,range=[0,3300],row=1, col=2)
fig.update_yaxes(visible=False,autorange='reversed', showgrid=False, row=1, col=2)


#fonctionne aussi mais que pour la première figure
#fig['layout']['xaxis']['autorange'] = "reversed"
#fig['layout']['xaxis']['range'] =[30,0]

fig.update_layout(height=600,width=600,autosize=False, showlegend=False,title_text="Word most used",title_x=0.5)

fig.show()
In [36]:
import plotly.graph_objects as go

fig = go.Figure(data=[
    go.Bar(name='Mel', x=wordcount_Mel_cheri['count'], y=wordcount_Mel_cheri['vals'],orientation='h',marker_color=px.colors.sequential.Magenta[1]),
    go.Bar(name='Nico', x=wordcount_Nico_chat['count'], y=wordcount_Nico_chat['vals'],orientation='h',marker_color=px.colors.qualitative.Safe[0])
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.update_layout(height=600,width=600,autosize=False, title_text="Nickname lover's winner is ",title_x=0.5)
fig.show()
In [43]:
import matplotlib.animation as animation
from IPython.display import HTML

fig, ax = plt.subplots(figsize=(15,8))

animator = animation.FuncAnimation(fig, draw_barchart_test, frames=range(2017, 2021))
HTML(animator.to_jshtml()) 
# or use animator.to_html5_video() or animator.save()
Out[43]: